# reproduction script for Kuehn, David and Ingo Rohlfing
# (2016): Are There Really Two Cultures?  A Pilot Study on
# the Application of Qualitative and Quantitative Methods in
# Political Science European Journal of Political Research:
# advance access

# Version
R.version.string
# >'R version 3.1.0 (2014-04-10)'

# downloading and installing required packages
if (!is.element("XLConnect", installed.packages()[, 1])) {
  install.packages("XLConnect")
} else {
  print("The requested library is already installed.")
}
library(XLConnect)

if (!is.element("irr", installed.packages()[, 1])) {
  install.packages("irr")
} else {
  print("The requested library is already installed.")
}
library(irr)

if (!is.element("ggplot2", installed.packages()[, 1])) {
  install.packages("ggplot2")
} else {
  print("The requested library is already installed.")
}
library(ggplot2)

if (!is.element("gridExtra", installed.packages()[, 1])) {
  install.packages("gridExtra")
} else {
  print("The requested library is already installed.")
}
library(gridExtra)

if (!is.element("vcd", installed.packages()[, 1])) {
  install.packages("vcd")
} else {
  print("The requested library is already installed.")
}
library(vcd)
library(dplyr)

# importing and preparing raw data
wb.attc <- loadWorkbook("attc_rn.xlsx", create = F)
cps3 <- readWorksheet(wb.attc, sheet = "cps3", startRow = 4, 
  endCol = 39, autofitCol = F, header = F)
wp3 <- readWorksheet(wb.attc, sheet = "wp3", startRow = 4, endCol = 39, 
  autofitCol = F, header = F)
ejpr3 <- readWorksheet(wb.attc, sheet = "ejpr3", startRow = 4, 
  endCol = 39, autofitCol = F, header = F)
attc <- rbind(cps3, wp3, ejpr3)

attc$Col1[attc$Col1 == "David"] <- "DK"
attc$Col1[attc$Col1 == "Ingo"] <- "IR"
vars <- c("Col15", "Col16", "Col17", "Col18", "Col19", "Col20", 
  "Col21", "Col22", "Col23", "Col24", "Col25", "Col26", "Col27", 
  "Col28", "Col29", "Col30", "Col31", "Col32", "Col33", "Col34", 
  "Col35", "Col36", "Col37", "Col38", "Col39")  # , 'Col40', 'Col41')
DK <- subset(attc, Col1 == "DK", select = vars)
IR <- subset(attc, Col1 == "IR", select = vars)

l.DK <- unlist(DK, recursive = T, use.names = T)
l.IR <- unlist(IR, recursive = T, use.names = T)
l.attc <- data.frame(cbind(l.DK, l.IR))
l.attc <- t(l.attc)

kappa2(t(l.attc), "unweighted")

### extracting "final" codes for descriptive  analysis
attc <- subset(attc, Col1 == "final" | Col1 == "Final")
attc$Col1 <- "final"

rownames(attc) <- NULL
rename(attc, A = Col1, B = Col2)

rm(list = setdiff(ls(), "attc"))
names(attc)[1] <- "Source"
names(attc)[2] <- "Random.ID"
names(attc)[3:14] <- LETTERS[1:12]
names(attc)[15:ncol(attc)] <- c("item_1", "item_2", "item_3", "item_4", "item_5",
                                "item_6", "item_7", "item_8", "item_9", "item_10", 
                                "item_11", "item_12", "item_13", "item_14", "item_15", 
                                "item_16", "item_17", "item_18", "item_19", "item_20", 
                                "item_21", "item_22", "item_23", "item_24", "item_25")
attc$L <- as.factor(attc$L)

for (i in 15:39) {
  attc[, i] <- as.numeric(attc[, i])
}

### Correlation between selected items (Cramer'S V) 
# Items on counterfactuals
assocstats(table(attc$item_5, attc$item_9))
assocstats(table(attc$item_5, attc$item_24))
assocstats(table(attc$item_9, attc$item_24))

# Items on asymmetry
assocstats(table(attc$item_22, attc$item_23))
assocstats(table(attc$item_22, attc$item_25))
assocstats(table(attc$item_23, attc$item_25))

### Descriptive histograms
# Dimension 1
hist01 <- ggplot(attc, aes(attc[, "item_1"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1), label = c("no (0)", "yes (1)")) + xlab("item 1") + ylab("frequency") + 
  theme_bw() + scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", 
  "qualitative"), name = "paradigm") + ggtitle("Explain outcome in individual case")

attc$item_2rec <- ifelse(attc$item_2 == 3, 2, attc$item_2)
hist02 <- ggplot(attc, aes(attc[, "item_2rec"], fill = attc[, 
  "L"])) + geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1, 2), label = c("cross-case (0)", "within-case (1)", "both (3)")) + 
  xlab("item 2") + ylab("frequency") + theme_bw() + scale_fill_grey(breaks = c(0, 
  1), labels = c("quantitative", "qualitative"), name = "paradigm") + 
  ggtitle("Cross-case vs. within-case level")

hist03 <- ggplot(attc, aes(attc[, "item_3"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1), labels = c("no (0)", "yes (1)")) + xlab("item 3") + ylab("frequency") + 
  theme_bw() + scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", 
  "qualitative"), name = "paradigm") + ggtitle("Causal mechanism analyzed")

hist04 <- ggplot(attc, aes(attc[, "item_4"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1), labels = c("no (0)", "yes (1)")) + xlab("item 4") + ylab("frequency") + 
  theme_bw() + scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", 
  "qualitative"), name = "paradigm") + ggtitle("Process tracing performed")

attc$item_5rec <- ifelse(attc$item_5 == 99, 2, attc$item_5)
hist05 <- ggplot(attc, aes(attc[, "item_5rec"], fill = attc[, 
  "L"])) + geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1, 2), labels = c("cross-case (0)", "within-case (1)", "missing (99)")) + 
  xlab("item 5") + ylab("frequency") + theme_bw() + scale_fill_grey(breaks = c(0, 
  1), labels = c("quantitative", "qualitative"), name = "paradigm") + 
  ggtitle("Level of counterfactual")

grid.arrange(hist01, hist02, hist03, hist04, hist05,
             top = textGrob("Dimension 1: Individual cases", 
                            gp = gpar(fontsize = 18, font = 1)))

# Dimension 2
hist06 <- ggplot(attc, aes(attc[, "item_6"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1), labels = c("yes (0)", "no (1)")) + xlab("item 6") + ylab("frequency") + 
  theme_bw() + scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", 
  "qualitative"), name = "paradigm") + ggtitle("Individual variable is at focus")

hist07 <- ggplot(attc, aes(attc[, "item_7"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1), labels = c("no (0)", "yes (1)")) + xlab("item 7") + ylab("frequency") + 
  theme_bw() + scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", 
  "qualitative"), name = "paradigm") + ggtitle("Configurations and interaction terms in model")

attc$item_8rec <- ifelse(attc$item_8 == 99, 2, attc$item_8)
hist08 <- ggplot(attc, aes(attc[, "item_8rec"], fill = attc[, 
  "L"])) + geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1, 2), labels = c("treatment effect (0)", "set logic (1)", 
  "missing (99)")) + xlab("item 8") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", 
    "qualitative"), name = "paradigm") + ggtitle("Causal effect")

attc$item_9rec <- ifelse(attc$item_9 == 99, 2, attc$item_9)
hist09 <- ggplot(attc, aes(attc[, "item_9rec"], fill = attc[, 
  "L"])) + geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1, 2), labels = c("after (0)", "during (1)", "missing (99)")) + 
  xlab("item 9") + ylab("frequency") + theme_bw() + scale_fill_grey(breaks = c(0, 
  1), labels = c("quantitative", "qualitative"), name = "paradigm") + 
  ggtitle("Timing of counterfactual relative to causal inference")

hist10 <- ggplot(attc, aes(attc[, "item_10"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1), labels = c("no (0)", "yes (1)")) + xlab("item 10") + 
  ylab("frequency") + theme_bw() + scale_fill_grey(breaks = c(0, 
  1), labels = c("quantitative", "qualitative"), name = "paradigm") + 
  ggtitle("Equifinality in model")

attc$item_11rec <- ifelse(attc$item_11 == 99, 2, attc$item_11)
hist11 <- ggplot(attc, aes(attc[, "item_11rec"], fill = attc[, 
  "L"])) + geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1, 2), labels = c("additive/linear (0)", "set logic (1)", 
  "missing (99)")) + xlab("item 11") + ylab("frequency") + 
  theme_bw() + scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", 
  "qualitative"), name = "paradigm") + ggtitle("Aggregation in model")

grid.arrange(hist06, hist07, hist08, hist09, hist10, hist11, 
             top = textGrob("Dimension 2: Causality and causal models", 
                            gp = gpar(fontsize = 18, font = 1)))

# Dimension 3
attc$item_12rec <- ifelse(attc$item_12 == 99, 2, attc$item_12)
hist12 <- ggplot(attc, aes(attc[, "item_12rec"], fill = attc[, 
  "L"])) + geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1, 2), labels = c("broad (0)", "narrow (1)", "missing (99)")) + 
  xlab("item 12") + ylab("frequency") + theme_bw() + scale_fill_grey(breaks = c(0, 
  1), labels = c("quantitative", "qualitative"), name = "paradigm") + 
  ggtitle("Scope of causal inferences")

attc$item_13rec <- ifelse(attc$item_13 == 99, 2, attc$item_13)
hist13 <- ggplot(attc, aes(attc[, "item_13rec"], fill = attc[, 
  "L"])) + geom_histogram(binwidth = 0.5) + scale_x_continuous(breaks = c(0, 
  1, 2), labels = c("representative (0)", "substantive (1,1 cell) (1)", 
  "missing (99)")) + xlab("item 13") + ylab("frequency") + 
  theme_bw() + scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", 
  "qualitative"), name = "paradigm") + ggtitle("Case selection")

hist14 <- ggplot(attc, aes(attc[, "item_14"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1), labels = c("no (0)", "yes (1)")) +
  xlab("item 14") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") + 
  ggtitle("Selection with no variance on outcome")

attc$item_15rec <- ifelse(attc$item_15 == 99, 2, attc$item_15)
hist15 <- ggplot(attc, aes(attc[, "item_15rec"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0,1, 2), 
                     labels = c("case-based (0)", "configuration-based (1)", 
                                "missing (99)")) + 
  xlab("item 15") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"), 
                  name = "paradigm") + ggtitle("Data format")

attc$item_16rec <- ifelse(attc$item_16 == 99, 2, attc$item_16)
hist16 <- ggplot(attc, aes(attc[, "item_16rec"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1, 2), 
                     labels = c("transformation (0)", "interpretation (1)", 
                                "missing (99)")) + 
  xlab("item 16") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"), 
                  name = "paradigm") + ggtitle("Triangular data")

grid.arrange(hist12, hist13, hist14, hist15, hist16, 
             top = textGrob("Dimension 3: Populations and data", 
                            gp = gpar(fontsize = 18, font = 1)))

# Dimension 4
hist17 <- ggplot(attc, aes(attc[, "item_17"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1), labels = c("variables/indicators (0)", 
                                                  "concepts/data (1)")) +
  xlab("item 17") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") + 
  ggtitle("Terminology")

hist18 <- ggplot(attc, aes(attc[, "item_18"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1), 
                     labels = c("unobserved variables (0)", 
                                "multidimensional concepts (1)")) +
  xlab("item 18") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") + 
  ggtitle("Ontology")

hist19 <- ggplot(attc, aes(attc[, "item_19"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1), 
                     labels = c("all variation (0)", 
                                "zones of variation (1)")) +
  xlab("item 19") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") + 
  ggtitle("Important variation")

attc$item_20rec <- ifelse(attc$item_20 == 99, 2, attc$item_20)
hist20 <- ggplot(attc, aes(attc[, "item_20rec"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1, 2), 
                     labels = c("statistical (0)", 
                                "semantic (1)",
                                "missing (99)")) +
  xlab("item 20") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") +
  ggtitle("Variable transformation rationale")

attc$item_21rec <- ifelse(attc$item_21 == 99, 2, attc$item_21)
hist21 <- ggplot(attc, aes(attc[, "item_21rec"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1, 2), 
                     labels = c("mutually exclusive (0)", 
                                "overlapping (1)",
                                "missing (99)")) +
  xlab("item 21") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") +
  ggtitle("Categories of typologies")

grid.arrange(hist17, hist18, hist19, hist20, hist21,
             top = textGrob("Dimension 4: Concepts and measurement", 
                            gp = gpar(fontsize = 18, font = 1)))

# Dimension 5
hist22 <- ggplot(attc, aes(attc[, "item_22"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1), 
                     labels = c("no (0)", 
                                "yes (1)")) +
  xlab("item 22") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") +
  ggtitle("Explaining 0s different than 1s")

hist23 <- ggplot(attc, aes(attc[, "item_23"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1), 
                     labels = c("symmetric (0)", "asymmetric (1)")) +
  xlab("item 23") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") +
  ggtitle("Concept and its opposite")

attc$item_24rec <- ifelse(attc$item_24 == 99, 2, attc$item_24)
hist24 <- ggplot(attc, aes(attc[, "item_24rec"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1, 2), 
                     labels = c("no (0)", 
                                "yes (1)",
                                "missing (99)")) +
  xlab("item 21") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") +
  ggtitle(expression('Counterfactual x'[i]*' -> x'[j]*' different from x'[j]*' -> x'[i])) 

hist25 <- ggplot(attc, aes(attc[, "item_25"], fill = attc[, "L"])) + 
  geom_histogram(binwidth = 0.5) + 
  scale_x_continuous(breaks = c(0, 1), 
                     labels = c("symmetric (0)", 
                                "asymmetric (1)")) +
  xlab("item 25") + ylab("frequency") + theme_bw() + 
  scale_fill_grey(breaks = c(0, 1), labels = c("quantitative", "qualitative"),
                  name = "paradigm") +
  ggtitle("Association when exchanging (0,1) and (1,0) cells for 2x2 tables ")

grid.arrange(hist22, hist23, hist24, hist25,
             top = textGrob("Dimension 5: Asymmetry", 
                            gp = gpar(fontsize = 18, font = 1)))

### Histograms of patterns in data
# Empirically irrelevant items
grid.arrange(hist05, hist09, hist16, hist20, hist21, hist24,
             top = textGrob("Empirically irrelevant items", 
                            gp = gpar(fontsize = 18, font = 1)))

# Items that are as expected
grid.arrange(hist01, hist02, hist03, hist04, hist13,
             top = textGrob("Items meeting ATTC expectations", 
                            gp = gpar(fontsize = 18, font = 1)))

# Items that follow quantitative method
grid.arrange(hist06, hist14, hist17, hist18, hist19,
             top = textGrob("Items following quantitative culture", 
                            gp = gpar(fontsize = 18, font = 1)))

# Items that focus on set theory
grid.arrange(hist08, hist11, hist15, hist22, hist23, hist25,
             top = textGrob("Items focusing on set theory", 
                            gp = gpar(fontsize = 18, font = 1)))

# Items showing mixed picture
grid.arrange(hist07, hist10, hist12, ncol = 2,
             top = textGrob("Items showing mixed picture", 
                            gp = gpar(fontsize = 18, font = 1)))
